


attach(COVID_Data_June_27_Treatments)

data<-COVID_Data_June_27_Treatments
###### Q22

# prepare data
data9 <- corpus(COVID_Data_June_27_Treatments, text_field = 'Q22')
docvars(data9)$text <- texts(data9)
data9 <- dfm(data9, stem = TRUE, remove = c(stopwords(source = "smart"),"pandemic", "virus", "mask", "wear", "respond", "spread"),
             remove_punct = TRUE) %>% dfm_trim(min_termfreq = 25)

out9<-convert(data9, to = 'stm')



out9$meta$Chinese <- as.factor(out9$meta$Chinese)


##############How many topics? ##########################################

set.seed(4)
kResult9 <- searchK(out9$documents, out9$vocab, K = c(5, 7, 10),
                    init.type = "Spectral", prevalence =~ Chinese, data = out9$meta)

plot(kResult9)
par(mfrow = c(1, 1),mar = c(5, 5, 5, 5))
plot(kResult9$results$semcoh, kResult9$results$exclus, xlab = "Semantic Coherence",
     ylab = "Exclusivity")

text(kResult9$results$semcoh, kResult9$results$exclus, labels = paste("K",
                                                                      kResult9$results$K), pos = 1)

knitr::kable(kResult9$results)


#fit models and effect estimates

data_10 <- stm(documents = out9$documents,
               vocab = out9$vocab,
               data = out9$meta,
               prevalence = ~ Chinese,
               K = 5, verbose = FALSE)
prep_10 <- estimateEffect(1:5 ~ Chinese, data_10,
                          meta = out9$meta, set.seed(4))

labelTopics(data_10, topics = NULL, n = 10)

par(mfrow = c(1, 1),mar = c(7, 1,3, .5))
plot(data_10, xlim=c(0,.7), n = 7, labeltype = "frex", topic.names = c("(1) President's Response (Negative)", "(2) Federal Government's Response", "(3) Partisan Response", "(4) States' Responses", "(5) Public's Response"), custom.labels = "", text.cex = .7)



summary(prep_10)

###################### PLOTS #################################

par(mfrow = c(2, 3),mar = c(2.5, 1, 3, .5))
plot(prep_10, covariate="Chinese", topics=c(1), model=data_10, 
     method="pointestimate", cov.value1="0", cov.value2="1",
     xlab="", main="President's Response \n B = 0.03, SE = 0.01, p < 0.00",cex.main=1.0, cex.lab=1.5, cex.axis=0.85,
     xlim=c(-.01,.35), labeltype ="custom", custom.labels=c('Chinese Virus', 'COVID-19'))

plot(prep_10, covariate="Chinese", topics=c(2), model=data_10, 
     method="pointestimate", cov.value1="0", cov.value2="1",
     xlab="", main="Federal Government's Response \n B = 0.00, SE = 0.01, p = 0.96 ",cex.main=1.0, cex.lab=1.5, cex.axis=0.85,
     xlim=c(-.01,.3), labeltype ="custom", custom.labels=c('Chinese Virus', 'COVID-19'))

plot(prep_10, covariate="Chinese", topics=c(3), model=data_10, 
     method="pointestimate", cov.value1="0", cov.value2="1",
     xlab="", main="Partisan Response Response \n B = 0.00, SE = 0.01, p = 0.52",cex.main=1.0, cex.lab=1.5, cex.axis=0.85,
     xlim=c(-.01,.3), labeltype ="custom", custom.labels=c('Chinese Virus', 'COVID-19'))

plot(prep_10, covariate="Chinese", topics=c(4), model=data_10, 
     method="pointestimate", cov.value1="0", cov.value2="1",
     xlab="", main="States' Response \n B = -0.02, SE = 0.01, p = 0.04",cex.main=1.0, cex.lab=1.5, cex.axis=0.85,
     xlim=c(-.01,.3), labeltype ="custom", custom.labels=c('Chinese Virus', 'COVID-19'))

plot(prep_10, covariate="Chinese", topics=c(5), model=data_10, 
     method="pointestimate", cov.value1="0", cov.value2="1",
     xlab="", main="Public's Response \n B = -0.02, SE = 0.01, p < 0.00",cex.main=1.0, cex.lab=1.5, cex.axis=0.85,
     xlim=c(-.01,.3), labeltype ="custom", custom.labels=c('Chinese Virus', 'COVID-19'))



###################Find Thoughts


attach(COVID_Data_June_27_Treatments)

data9r <- corpus(COVID_Data_June_27_Treatments, text_field = 'Q22')

docvars(data9r)$text <- texts(data9r)
data9r <- dfm(data9r, stem = TRUE, remove = c(stopwords(source = "smart"),"pandemic", "virus", "respond",  "mask", "wear", "spread"),
              remove_punct = TRUE) %>% dfm_trim(min_termfreq = 25)

out<-convert(data9r, to = 'stm')


model <- stm(out$documents, out$vocab, K = 5, max.em.its = 150, data = out$meta,
             init.type = "Spectral", prevalence =~ Chinese)

summary(model)


thoughts1<- findThoughts(model, texts = out$meta$Q22, topics = 5, n = 25)

head(thoughts1)



##################################################################
##################################################################
##################################################################
# Jason's additions (use as an example!)

# Topic probabilities/prevalence are the 7th elements of the list
# created by the "stm" function
data_10_prevalence_matrix = data_10[[7]]

# Histograms of Topic probabilities by topic
par(mfrow=c(3,2))
hist(data_10_prevalence_matrix[,1],main = "(1) President's Response (Negative)", xlab = "Probability" )
hist(data_10_prevalence_matrix[,2],main = "(2) Federal Government's Response", xlab = "Probability")
hist(data_10_prevalence_matrix[,3],main = "(3) Partisan Response", xlab = "Probability")
hist(data_10_prevalence_matrix[,4],main = "(4) States' Response", xlab = "Probability")
hist(data_10_prevalence_matrix[,5],main = "(5) Public's Response", xlab = "Probability")

# Histogram of maximum topic probability this is the probability
# that is used to classify the document as a single topic
data_6_max_topic = c()
for(i in 1:dim(data_6_prevalence_matrix)[1]){
        data_6_max_topic[i] = max(data_6_prevalence_matrix[i,])
}












###################################Republican replication (H3)

attach(COVID_Data_June_27_Treatments)

COVID_Data_June_27_Treatments_Conservative <- COVID_Data_June_27_Treatments[ which(Conservative=='1'),]

###### Q22

# prepare data
data9r <- corpus(COVID_Data_June_27_Treatments_Conservative, text_field = 'Q22')

docvars(data9r)$text <- texts(data9r)
data9r <- dfm(data9r, stem = TRUE, remove = c(stopwords(source = "smart"),"pandemic", "virus", "mask", "wear", "respond", "spread"),
             remove_punct = TRUE) %>% dfm_trim(min_termfreq = 25)

out9r<-convert(data9r, to = 'stm')

out9r$meta$Chinese <- as.factor(out9r$meta$Chinese)

summary(out9r)

##############How many topics? ##########################################

set.seed(4)
kResult9r <- searchK(out9r$documents, out9r$vocab, K = c(3, 4, 5),
                    init.type = "Spectral", prevalence =~ Chinese, data = out9r$meta)

plot(kResult9r)
par(mfrow = c(1, 1),mar = c(5, 5, 5, 5))
plot(kResult9r$results$semcoh, kResult9r$results$exclus, xlab = "Semantic Coherence",
     ylab = "Exclusivity")

text(kResult9r$results$semcoh, kResult9r$results$exclus, labels = paste("K",
                                                                      kResult9r$results$K), pos = 1)

knitr::kable(kResult9r$results)


#fit models and effect estimates

data_10r <- stm(documents = out9r$documents,
               vocab = out9r$vocab,
               data = out9r$meta,
               prevalence = ~ Chinese,
               K = 5, verbose = FALSE)
prep_10r <- estimateEffect(1:5 ~ Chinese, data_10r,
                          meta = out9r$meta, set.seed(4))

labelTopics(data_10r, topics = NULL, n = 10)

par(mfrow = c(1, 1),mar = c(7, 1,3, .5))
plot(data_10r, xlim=c(0,1), n = 7, labeltype = "frex", topic.names = c("(1) States' Response", "(2) Media's Response", "(3) Democrat States' Response", "(4) Democrat Governors' Response", "(5) President's Response"), custom.labels = " ", text.cex = .6)

summary(prep_10r)

###################### PLOTS #################################

par(mfrow = c(2, 3),mar = c(2.5, 1, 3, .5))
plot(prep_10r, covariate="Chinese", topics=c(1), model=data_10r, 
     method="pointestimate", cov.value1="0", cov.value2="1",
     xlab="", main="States' Response \n B = 0.004, SE = 0.01, p = 0.764",cex.main=1.0, cex.lab=1.5, cex.axis=0.85,
     xlim=c(-.01,.25), labeltype ="custom", custom.labels=c('Chinese Virus', 'COVID-19'))

plot(prep_10r, covariate="Chinese", topics=c(2), model=data_10r, 
     method="pointestimate", cov.value1="0", cov.value2="1",
     xlab="", main="Media's Response \n B = -0.04, SE = 0.01, p = 0.007 ",cex.main=1.0, cex.lab=1.5, cex.axis=0.85,
     xlim=c(-.01,.4), labeltype ="custom", custom.labels=c('Chinese Virus', 'COVID-19'))

plot(prep_10r, covariate="Chinese", topics=c(3), model=data_10r, 
     method="pointestimate", cov.value1="0", cov.value2="1",
     xlab="", main="Democrat States' Response \n B = -0.03, SE = 0.01, p = 0.012",cex.main=1.0, cex.lab=1.5, cex.axis=0.85,
     xlim=c(-.01,.4), labeltype ="custom", custom.labels=c('Chinese Virus', 'COVID-19'))

plot(prep_10r, covariate="Chinese", topics=c(4), model=data_10r, 
     method="pointestimate", cov.value1="0", cov.value2="1",
     xlab="", main="Democrat Governors' Response \n B = 0.02, SE = 0.01, p = 0.07",cex.main=1.0, cex.lab=1.5, cex.axis=0.85,
     xlim=c(-.01,.4), labeltype ="custom", custom.labels=c('Chinese Virus', 'COVID-19'))

plot(prep_10r, covariate="Chinese", topics=c(5), model=data_10r, 
     method="pointestimate", cov.value1="0", cov.value2="1",
     xlab="", main="President's Response \n B = -0.05, SE = 0.02, p = 0.004",cex.main=1.0, cex.lab=1.5, cex.axis=0.85,
     xlim=c(-.01,.4), labeltype ="custom", custom.labels=c('Chinese Virus', 'COVID-19'))




###################Find Thoughts


attach(COVID_Data_June_27_Treatments_Conservative)

data9r <- corpus(COVID_Data_June_27_Treatments_Conservative, text_field = 'Q22')

docvars(data9r)$text <- texts(data9r)
data9r <- dfm(data9r, stem = TRUE, remove = c(stopwords(source = "smart"),"pandemic", "virus", "respond",  "mask", "wear", "spread"),
              remove_punct = TRUE) %>% dfm_trim(min_termfreq = 25)

out<-convert(data9r, to = 'stm')


model <- stm(out$documents, out$vocab, K = 5, max.em.its = 150, data = out$meta,
             init.type = "Spectral", prevalence =~ Chinese)

summary(model)
        
        
thoughts1<- findThoughts(model, texts = out$meta$Q22, topics = 5, n = 35)
        
head(thoughts1)



##################################################################
##################################################################
##################################################################
# Jason's additions (use as an example!)

# Topic probabilities/prevalence are the 7th elements of the list
# created by the "stm" function
data_10r_prevalence_matrix = data_10r[[7]]

# Histograms of Topic probabilities by topic
par(mfrow=c(3,2))
hist(data_10r_prevalence_matrix[,1],main = "(1) States' Response", xlab = "Probability" )
hist(data_10r_prevalence_matrix[,2],main = "(2) Media's Response", xlab = "Probability")
hist(data_10r_prevalence_matrix[,3],main = "(3) Democrat States' Response", xlab = "Probability")
hist(data_10r_prevalence_matrix[,4],main = "(4) Democrat Governors' Response", xlab = "Probability")
hist(data_10r_prevalence_matrix[,5],main = "(5) President's Response", xlab = "Probability")



